1. Setting up environment

1.1 Set working directory

setwd("C:/Users/leich/Dropbox (Cambridge University)/LabRetreat_Han2023")

1.2 Load required libraries.

(You should have these libraries installed before the workshop session. If not, please ask for help) igraph: dplyr: Remotes:

# igraph for network analysis
library(igraph)
## Warning: package 'igraph' was built under R version 4.2.3
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
# dplyr for manipulating data table
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.2.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:igraph':
## 
##     as_data_frame, groups, union
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# data.table for reading data
library(data.table)
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last

2. Read in dataset

2.1 MalaCard CML dataset from GitHub

CMLfile = "https://raw.githubusercontent.com/chengwailei/YUHS_NetworkAnalysis/main/data/CML_MalaCard_Gene.csv"

CML_keygenes_df = read.csv(CMLfile)

rmarkdown::paged_table(CML_keygenes_df) ## dont run this
# run this:
# CML_keygenes_df

2.2 STRING interactome datset from STRING

STRINGfile = "https://stringdb-downloads.org/download/protein.physical.links.v12.0/9606.protein.physical.links.v12.0.txt.gz"

interactome_df = fread(STRINGfile, 
                            sep = " ",
                            header = TRUE,)
interactome_df = as.data.frame(interactome_df)

rmarkdown::paged_table(interactome_df) ## dont run this
# run this:
# interactome_df

2.3 STRING protein alias dataset

STRINGAliasFile = "https://stringdb-downloads.org/download/protein.aliases.v12.0/9606.protein.aliases.v12.0.txt.gz"

alias_df = fread(STRINGAliasFile,
                      sep = "\t",
                      fill = TRUE)
alias_df = as.data.frame(alias_df)

colnames(alias_df) <-  c("protein_name", "alias", "source")

rmarkdown::paged_table(alias_df) ## dont run this
# run this:
# alias_df

3. Define the gene of interests

We are interested in protein encoding genes that are likely pathogeneic in the MalaCard geneset.

CML_keygenes_list = CML_keygenes_df[(CML_keygenes_df$Category == "Protein Coding")&(CML_keygenes_df$Evidence=="Diseases_inferred"),]$Symbol
CML_keygenes_list = unique(CML_keygenes_list)
CML_keygenes_list
##   [1] "ABL1"      "BCR"       "NRAS"      "KRAS"      "SF3B1"     "SETBP1"   
##   [7] "RUNX1"     "CSF3R"     "BRAF"      "IFNA1"     "KIT"       "CRKL"     
##  [13] "JAK2"      "PDGFRB"    "IL3"       "STAT5A"    "CSF3"      "FLT3"     
##  [19] "CD34"      "LYN"       "CSF2"      "KITLG"     "IRF8"      "PDGFRA"   
##  [25] "WT1"       "IFNA2"     "SRC"       "SLC22A1"   "HCK"       "MCL1"     
##  [31] "ETV6"      "MYC"       "ABCB1"     "CASP3"     "BCL2"      "BCL2L1"   
##  [37] "NUP98"     "CD33"      "MPL"       "PRAME"     "DNTT"      "STAT5B"   
##  [43] "ANXA5"     "DOK1"      "GATA2"     "GRB2"      "MECOM"     "HSP90AA1" 
##  [49] "HRAS"      "PRTN3"     "MYB"       "THPO"      "HOXA9"     "GAB2"     
##  [55] "AXL"       "U2AF1"     "PTK2B"     "EPO"       "CASP9"     "FGFR1"    
##  [61] "GATA1"     "CD177"     "TKT"       "MPO"       "PCBP2"     "CBL"      
##  [67] "BACH2"     "STAT1"     "BIRC5"     "ABCC1"     "NKG7"      "RASA1"    
##  [73] "IKZF1"     "RARA"      "CDKN1A"    "DNMT1"     "CDKN2B"    "MEIS1"    
##  [79] "KMT2A"     "KIR3DL1"   "FOXO3"     "CEACAM8"   "EIF4EBP1"  "NTRK3"    
##  [85] "PIK3CG"    "SIPA1"     "HSPA4"     "SOCS1"     "CRK"       "BMI1"     
##  [91] "CCND2"     "H2AC18"    "TCN1"      "MAPK8"     "GAPDH"     "CDK2"     
##  [97] "JUNB"      "MAPK9"     "MAPK1"     "FCGR3B"    "PTPN11"    "NF1"      
## [103] "PXN"       "HDAC9"     "FN1"       "CSF2RA"    "HOXA10"    "CCL3"     
## [109] "KLRK1"     "IL11"      "FES"       "CEBPB"     "MNDA"      "TNFSF10"  
## [115] "MAPK14"    "PTPRC"     "RAF1"      "EVPL"      "TAL1"      "ELANE"    
## [121] "APAF1"     "EPOR"      "NTRK1"     "CTLA4"     "CBFB"      "GZMB"     
## [127] "HSPA8"     "NME1"      "G6PD"      "ASS1"      "TNFRSF10B" "H4C16"    
## [133] "SOCS2"     "EIF4E"     "PLK1"      "ITGB3"     "PTK2"      "ADA"      
## [139] "SHC1"      "AKT1"      "CCNA2"     "TP53"      "INPP5D"    "EGFR"     
## [145] "NFKBIA"    "CREB1"     "PML"       "TNFRSF10A" "SOCS3"     "CDC25A"   
## [151] "SELP"      "IRF4"      "LEF1"      "CREBBP"    "KDM4C"     "ERBB2"    
## [157] "BIRC3"     "CD38"      "PTPN6"     "CCR7"      "PRKCD"     "ENG"      
## [163] "ALK"       "STAT3"     "PTPN1"     "SIRPA"     "NPM1"      "CTNNB1"   
## [169] "ICOSLG"    "ASXL1"     "CCR6"      "CD19"      "CD8A"      "LUC7L2"   
## [175] "LCK"       "VEGFA"     "CCND1"     "PTEN"      "TET2"      "ITGB1"    
## [181] "IRF2"      "PRODH"     "BCL2L11"   "ABCG2"     "ABL2"      "MLLT3"    
## [187] "CDKN1C"    "CD4"       "NOTCH1"    "IL2"       "CALR"      "IL6"      
## [193] "ACTB"      "CEBPA"     "MTOR"      "GOLGA4"    "HLA-A"     "JAK1"     
## [199] "DNMT3A"

❓Time for Question 1 & 2

Try and edit the code and answer quiz questions 1 and 2 on Slido.

Slido.com: # 1675300

We need to subset to get the Ensembl protein IDs by their gene names. Note that some alias may not have a corresponding protein name. This might be due to the uses of different alias in the interactome or it is not recorded at all.

## choosing alias only from KEGG NAME
alias_df_1 = alias_df[alias_df$source == "KEGG_NAME",]

keygenes_Ensembl = alias_df_1[alias_df_1$alias %in% CML_keygenes_list,]
keygenes_Ensembl_list = unique(keygenes_Ensembl$protein_name)

4.Extract subnetwork

To extract the CML subnetwork, we keep interactions that involve our genes of interests.

CML_network = interactome_df[(interactome_df$protein1 %in% keygenes_Ensembl_list) &(interactome_df$protein2 %in% keygenes_Ensembl_list), ]

rmarkdown::paged_table(CML_network) ## dont run this
# run this:
# CML_network

We have the list of interactions, which is huge (4,232 interaction). They are not all relevant though. STRING provides a separate column “combined_score” which denotes how confident they are with the interaction. This reflects whether the interaction is simply predicted due to co-mentioning in literature or tested experimentally for their interaction.

A middle, and widely accepted cut-off is 400. We want to aim for a higher confidence at 800. This can be done by:

CML_network = CML_network[CML_network$combined_score >= 800,]

rmarkdown::paged_table(CML_network) ## dont run this
# run this:
# CML_network

❓Time for Question 3

Try and answer quiz question 3 on Slido!

Now we have a protein network but the Ensembl Protein IDs are confusing. We can add the alias name from the keygenes_Ensembl that we have generated.

CML_network = merge(CML_network,
              keygenes_Ensembl[c("protein_name","alias")],
              by.x = "protein1",
              by.y = "protein_name",
              all.x = TRUE)
colnames(CML_network) = c("protein1", "protein2", "combined_score","protein1_alias")

CML_network = merge(CML_network,
              keygenes_Ensembl[c("protein_name","alias")],
              by.x = "protein2",
              by.y = "protein_name",
              all.x = TRUE)
colnames(CML_network) = c("protein1", "protein2", "combined_score","protein1_alias", "protein2_alias")

rmarkdown::paged_table(CML_network) ## dont run this
# run this:
# CML_network

Save network

To ensure we can retrieve the same result in each run, saving the network would be helpful. This also helps to feed the result to other pipelines.

write.csv(CML_network, "CML_network.csv", row.names = FALSE)
CML_network <- read.csv("CML_network.csv")

rmarkdown::paged_table(CML_network) ## dont run this
# run this:
# CML_network

5. Network as network

5.1 Converting to network

Up until now, we are subsetting the network as a datatable (a.k.a edgelist, as the table stores the edges of the data). To run any algorithms, we will need to read it as a network. The package we are using is igraph, which you should have loaded at the start of the tutorial

g <- graph_from_data_frame(CML_network[c("protein1_alias","protein2_alias")], 
                           directed = FALSE, ## indicate there are no directionality
                           vertices = NULL) ## no additional file indicating the vertice properites

5.2 Visualise the graph

To visualise the graph, we use the plot function

par(mar = rep(0, 4))   # reduce margins
## adjusting layout
plot(g, 
     vertex.color = 'white', 
     vertex.size = 2,
     edge.color = rgb(0.5, 0.5, 0.5, 0.2),
     vertex.label.cex = 0.7
     )

The nodes are all clustered together. The best way to overcome this is to adjust the layout.

par(mar = rep(2, 4))   # reduce margins
## adjusting layout
lay <- layout_(g,
               with_dh(weight.edge.lengths = edge_density(g)/1000))
plot(g, 
     vertex.color = 'white', 
     vertex.size = 2,
     edge.color = rgb(0.5, 0.5, 0.5, 0.2),
     vertex.label.cex = 0.7,
     layout=lay
     )

❓Time for Question 4

Try and answer quiz question 4 on Slido!

6. Centrality measures

In the previous lectures, we learnt about several centrality scores. The algorithms measure the topology of the network and give us some insights on the node or edge importances. In this exercise, we use the CML network and calculate some centralities we have described.

6.1 Degree centrality

Degree centrality measures the number of edges connected to the nodes. We use the “degree()” function from igraph to calculate the degree centrality of the CML network.

degree_centrality <- degree(g, 
                            v = V(g), 
                            mode = "all")

degree_centrality[order(degree_centrality,
      decreasing = TRUE)]
##      GRB2       SRC      JAK2      EGFR      SHC1    PTPN11     STAT3      JAK1 
##        58        52        46        46        42        42        38        34 
##      TP53  HSP90AA1       LYN       CBL       CRK     RUNX1      CRKL      PTK2 
##        32        32        30        30        24        24        24        24 
##    CREBBP      ABL1    STAT5A      EPOR       LCK    STAT5B      HRAS     STAT1 
##        22        22        20        20        18        18        18        18 
##     ERBB2    PDGFRA    CTNNB1      KRAS      CDK2    CDKN1A     MAPK1     FOXO3 
##        18        16        16        14        14        14        14        14 
##     PTK2B       FN1     HSPA4      RAF1     HSPA8    BCL2L1    PDGFRB     CCND1 
##        14        14        14        12        12        12        12        12 
##     APAF1     ITGB3     PTPN6      GAB2     SOCS3       IL3       IL6     RASA1 
##        12        12        12        10        10        10        10        10 
##      CSF3      NRAS       HCK      BCL2     ITGB1     SOCS2       EPO     CSF3R 
##        10        10        10        10        10         8         8         8 
##     CCND2    CDKN1C      BRAF       KIT     CEBPA     CCNA2     GATA1       IL2 
##         8         8         8         8         8         8         8         8 
##     SOCS1      ACTB     NTRK1      CD8A     PRTN3      THPO      CSF2     CREB1 
##         8         8         8         6         6         6         6         6 
##      LEF1     CASP3     DNMT1     CEBPB       MYC      NPM1     PTPN1     KITLG 
##         6         6         6         6         6         6         6         6 
##     GATA2       ALK      TAL1     MAPK8     MAPK9      AKT1      CALR     HLA-A 
##         6         6         6         6         6         6         6         6 
##   BCL2L11    PIK3CG     NTRK3      CD19     SIRPA    INPP5D     CTLA4   TNFSF10 
##         6         6         6         6         6         6         6         4 
## TNFRSF10B     ELANE       MPO TNFRSF10A      CCL3       PML       NF1    FCGR3B 
##         4         4         4         4         4         4         4         4 
##     KMT2A     CASP9      MTOR    DNMT3A      GZMB       AXL      MCL1       MPL 
##         4         4         4         4         4         4         4         4 
##      FLT3       CD4    MAPK14     CD177      TCN1      IRF4      RARA      PLK1 
##         4         4         2         2         2         2         2         2 
##     HOXA9      DOK1      CBFB     MECOM     BIRC3      PTEN     MEIS1      CD34 
##         2         2         2         2         2         2         2         2 
##  EIF4EBP1       BCR      ABL2      BMI1     ASXL1     IKZF1   KIR3DL1      CD33 
##         2         2         2         2         2         2         2         2 
##     GAPDH     PTPRC      CCR6      CCR7    ICOSLG     IFNA1 
##         2         2         2         2         2         2

Visualise network by centrality scores

We can visualise the degree centrality on the network plot itself.

par(mar = rep(0, 4))   # reduce margins
## define colour
X = length(V(g))
colors <- heat.colors(X, rev=T)

plot(g, 
     #layout = lay,
      vertex.color = colors[degree(g)], 
      vertex.size = 10,
      vertex.label.cex = 0.5,
      vertex.label.color = "black",
      bg = ""
      )

6.2 Closeness centrality

Closeness centrality describe how far a node is from other nodes. We use the “closeness()” function from igraph to calculate the closeness centrality of the CML network.

closeness_centrality <- closeness(g)

closeness_centrality[order(closeness_centrality,
      decreasing = TRUE)]
##       HOXA9       MEIS1        BMI1       ASXL1     TNFSF10   TNFRSF10B 
## 1.000000000 1.000000000 1.000000000 1.000000000 0.500000000 0.500000000 
##   TNFRSF10A        CCL3       PRTN3        CCR6        CCR7       ELANE 
## 0.500000000 0.500000000 0.333333333 0.333333333 0.333333333 0.250000000 
##         MPO       CD177         SRC       STAT3        EGFR        GRB2 
## 0.250000000 0.200000000 0.003787879 0.003521127 0.003521127 0.003436426 
##      PTPN11    HSP90AA1        SHC1        JAK2         LYN       ERBB2 
## 0.003367003 0.003322259 0.003289474 0.003267974 0.003174603 0.003144654 
##        JAK1        TP53      CREBBP         CBL        PTK2      STAT5A 
## 0.003125000 0.003105590 0.003067485 0.003067485 0.003058104 0.003030303 
##       MAPK1        ABL1         LCK         CRK       STAT1       RUNX1 
## 0.003012048 0.003012048 0.002994012 0.002967359 0.002958580 0.002958580 
##      STAT5B      PDGFRA      CTNNB1         HCK        CRKL      PDGFRB 
## 0.002949853 0.002941176 0.002932551 0.002932551 0.002890173 0.002873563 
##       PTK2B        GAB2        HRAS       FOXO3        AKT1        NPM1 
## 0.002873563 0.002808989 0.002785515 0.002777778 0.002777778 0.002770083 
##       ITGB3         FN1         AXL        RAF1        EPOR       PTPN6 
## 0.002754821 0.002739726 0.002732240 0.002724796 0.002724796 0.002666667 
##       HSPA4       PTPN1       CEBPB       NTRK1         IL6       DNMT1 
## 0.002659574 0.002645503 0.002638522 0.002617801 0.002590674 0.002590674 
##       NTRK3       RASA1       ITGB1         ALK       SIRPA         KIT 
## 0.002590674 0.002570694 0.002557545 0.002551020 0.002538071 0.002531646 
##       HSPA8      CDKN1A      INPP5D        CD19         MYC        BRAF 
## 0.002518892 0.002518892 0.002512563 0.002493766 0.002463054 0.002457002 
##       APAF1       CSF3R        ACTB        IRF4        BCL2        FLT3 
## 0.002457002 0.002444988 0.002444988 0.002439024 0.002439024 0.002421308 
##       CREB1      PIK3CG        LEF1        KRAS        CALR       CTLA4 
## 0.002403846 0.002403846 0.002392344 0.002386635 0.002386635 0.002380952 
##         IL3      BCL2L1       CCND1       SOCS3        CSF3       SOCS1 
## 0.002364066 0.002364066 0.002364066 0.002358491 0.002352941 0.002352941 
##        MTOR         EPO       CEBPA        GZMB         MPL         IL2 
## 0.002352941 0.002347418 0.002331002 0.002331002 0.002325581 0.002288330 
##       CCNA2       IFNA1        PLK1       KMT2A        CD8A        NRAS 
## 0.002277904 0.002242152 0.002232143 0.002217295 0.002212389 0.002212389 
##        TCN1        PTEN       MAPK8       MAPK9         CD4      MAPK14 
## 0.002212389 0.002207506 0.002192982 0.002192982 0.002192982 0.002183406 
##         BCR        ABL2        CDK2       GATA1       PTPRC       GATA2 
## 0.002183406 0.002183406 0.002178649 0.002173913 0.002173913 0.002169197 
##        TAL1         PML        CBFB       MECOM       KITLG         NF1 
## 0.002169197 0.002164502 0.002155172 0.002155172 0.002100840 0.002083333 
##       SOCS2      DNMT3A        CD34       CCND2        DOK1       HLA-A 
## 0.002049180 0.002044990 0.002036660 0.001964637 0.001941748 0.001934236 
##      CDKN1C       GAPDH      FCGR3B       CASP3       CASP9     BCL2L11 
## 0.001915709 0.001912046 0.001904762 0.001886792 0.001883239 0.001879699 
##        CSF2        THPO      ICOSLG        MCL1    EIF4EBP1       IKZF1 
## 0.001851852 0.001831502 0.001831502 0.001824818 0.001814882 0.001706485 
##        RARA     KIR3DL1        CD33       BIRC3 
## 0.001700680 0.001555210 0.001536098 0.001524390

We see HOXA9, and MEIS1 are jointly at the top. They are from the disconnected graph.

❓Time for Question 5

Try and answer quiz question 5 on Slido!

6.3 Betweenness centrality

Betweenness centrality describe how much influence a node has in the network. We use the “betweenness()” function from igraph to calculate the Betweenness centrality of the CML network.

betweenness_centrality <- betweenness(g)

betweenness_centrality[order(betweenness_centrality,
      decreasing = TRUE)]
##          SRC     HSP90AA1         GRB2        RUNX1        STAT3         TP53 
## 1657.0074563 1453.6903142 1050.4800924 1034.5827514 1030.8493079  959.3456120 
##         JAK2         EGFR       PTPN11       CREBBP          LCK         SHC1 
##  938.9748795  747.2673912  672.8506374  480.8040049  465.0660305  462.0491129 
##         ABL1         JAK1        APAF1          LYN         HRAS        MAPK1 
##  428.3986606  414.7631181  407.2044758  406.9505766  290.1139735  258.4301585 
##         PTK2         CD19          CBL        FOXO3       BCL2L1        RASA1 
##  253.2915905  250.5836441  237.0842378  224.0694477  205.6064147  163.8690773 
##        HSPA8          CRK          FN1         CALR       CDKN1A        HLA-A 
##  163.6236726  161.1206687  154.6942667  152.1455128  147.6236060  134.9464286 
##         CRKL         EPOR        ERBB2         BCL2          PML       FCGR3B 
##  133.6058462  133.4761884  130.2126259  129.6477371  126.0000000  126.0000000 
##        CASP3        GATA1         MTOR        CTLA4       CTNNB1        CCND1 
##  126.0000000  126.0000000  126.0000000  126.0000000  118.4693804  111.7086459 
##         CD8A       STAT5A        HSPA4         CSF3         NPM1          IL3 
##  103.9259158   99.1687605   92.2768404   86.2796157   76.5131758   75.8938316 
##        DNMT1         KRAS         RAF1        NTRK1        CCNA2        STAT1 
##   75.4817433   65.4964644   61.5551469   58.8162393   53.6811661   52.3333594 
##          MYC        ITGB3          EPO        CEBPB         CDK2          KIT 
##   49.5182567   49.4247666   48.3155878   46.8658560   43.3722410   41.6476199 
##         BRAF        PTPN6       PDGFRB          MPL          IL6          IL2 
##   40.9770998   38.9381370   36.2654259   35.7033616   32.3549302   31.7377351 
##       PIK3CG         ACTB        CEBPA         GAB2       STAT5B        ITGB1 
##   29.6522564   28.5243416   23.8592720   23.8188312   21.6380912   18.0568941 
##         FLT3       PDGFRA        CREB1        SOCS3        SOCS1         NRAS 
##   17.9818352   16.1279408   13.2530525   12.4693962   12.2193962    8.9278250 
##        CSF3R          HCK        KITLG        PTK2B         AKT1         LEF1 
##    8.8476558    8.2633700    7.5462185    6.5235160    5.5595410    4.8712121 
##        SOCS2         CSF2        CCND2      BCL2L11        PRTN3       CDKN1C 
##    3.7293040    3.7000000    2.7435953    2.2750073    2.0000000    1.6167249 
##         THPO          ALK         CCL3       INPP5D       DNMT3A       MAPK14 
##    1.3333333    1.2269231    1.0000000    0.5836441    0.5000000    0.0000000 
##      TNFSF10    TNFRSF10B        ELANE        CD177          MPO    TNFRSF10A 
##    0.0000000    0.0000000    0.0000000    0.0000000    0.0000000    0.0000000 
##          NF1        KMT2A         TCN1         IRF4         RARA         PLK1 
##    0.0000000    0.0000000    0.0000000    0.0000000    0.0000000    0.0000000 
##        HOXA9         DOK1        PTPN1        GATA2         TAL1         CBFB 
##    0.0000000    0.0000000    0.0000000    0.0000000    0.0000000    0.0000000 
##        MECOM        MAPK8        MAPK9        BIRC3        CASP9         PTEN 
##    0.0000000    0.0000000    0.0000000    0.0000000    0.0000000    0.0000000 
##        MEIS1         CD34     EIF4EBP1         GZMB          BCR        NTRK3 
##    0.0000000    0.0000000    0.0000000    0.0000000    0.0000000    0.0000000 
##         ABL2          AXL         BMI1         MCL1        ASXL1        IKZF1 
##    0.0000000    0.0000000    0.0000000    0.0000000    0.0000000    0.0000000 
##        SIRPA      KIR3DL1          CD4         CD33        GAPDH        PTPRC 
##    0.0000000    0.0000000    0.0000000    0.0000000    0.0000000    0.0000000 
##         CCR6         CCR7       ICOSLG        IFNA1 
##    0.0000000    0.0000000    0.0000000    0.0000000

❓Time for Question 6

Try and answer quiz question 6 on Slido!

6.4 Eigenvector centrality

Eigenvector centrality describe how much influence a node has in the network. We use the “evcent()” function from igraph to calculate the Eigenvector centrality of the CML network.

You might notice that the algorithms returns a list of list. To over come this, we need to read into the actual result, which is stored as “vector”.

eigenvector_centrality <- evcent(g)
eigenvector_centrality <- eigenvector_centrality$vector

eigenvector_centrality[order(eigenvector_centrality,
      decreasing = TRUE)]
##         GRB2         EGFR          SRC       PTPN11         SHC1         JAK2 
## 1.000000e+00 9.702885e-01 9.504359e-01 8.954394e-01 8.336153e-01 8.332011e-01 
##        STAT3         JAK1          CBL          CRK          LYN         PTK2 
## 6.944604e-01 6.441682e-01 6.359699e-01 5.537367e-01 5.475639e-01 5.415599e-01 
##         CRKL       STAT5A        ERBB2       STAT5B        STAT1       PDGFRA 
## 5.165171e-01 4.998364e-01 4.904466e-01 4.823432e-01 4.633565e-01 4.523738e-01 
##         EPOR        PTK2B       PDGFRB         ABL1     HSP90AA1          LCK 
## 4.214892e-01 3.854504e-01 3.565130e-01 3.423456e-01 3.416127e-01 3.403185e-01 
##       CTNNB1         GAB2        PTPN6          HCK          FN1       CREBBP 
## 3.091495e-01 3.063666e-01 2.963099e-01 2.733834e-01 2.483516e-01 2.328758e-01 
##        MAPK1        ITGB3        PTPN1          IL6        RUNX1       INPP5D 
## 2.303543e-01 2.278371e-01 2.192058e-01 2.130447e-01 1.984770e-01 1.933983e-01 
##          KIT        NTRK1        SIRPA        NTRK3         TP53        CSF3R 
## 1.803556e-01 1.797406e-01 1.780129e-01 1.767306e-01 1.749920e-01 1.721225e-01 
##        SOCS3        ITGB1          AXL          ALK        SOCS1         CD19 
## 1.709342e-01 1.667085e-01 1.584135e-01 1.577168e-01 1.392726e-01 1.265327e-01 
##        FOXO3        RASA1         RAF1         HRAS         AKT1          EPO 
## 1.238378e-01 1.209598e-01 1.162747e-01 1.155240e-01 1.132254e-01 1.113789e-01 
##         NPM1        HSPA4        CTLA4          IL3        MAPK8        MAPK9 
## 1.082420e-01 1.020172e-01 9.743609e-02 9.619855e-02 9.460969e-02 9.460969e-02 
##         CSF3         GZMB         FLT3          IL2        CEBPB        DNMT1 
## 9.429234e-02 9.258882e-02 8.661765e-02 7.921479e-02 7.461697e-02 7.136908e-02 
##          MPL        SOCS2         ACTB         LEF1        HSPA8       PIK3CG 
## 6.948733e-02 6.646307e-02 6.452851e-02 6.014326e-02 5.833651e-02 5.687257e-02 
##         IRF4        IFNA1         TCN1         BRAF         KRAS       CDKN1A 
## 5.640376e-02 5.231905e-02 5.165319e-02 5.044678e-02 4.770467e-02 4.642288e-02 
##         PTEN          MYC        APAF1         NRAS        KITLG         CALR 
## 4.398526e-02 4.271095e-02 4.097725e-02 3.714461e-02 3.706006e-02 3.647639e-02 
##        CREB1         CD8A        CCND1          CD4         BCL2         MTOR 
## 3.489321e-02 3.053184e-02 3.036849e-02 3.012030e-02 2.927087e-02 2.792988e-02 
##          BCR         ABL2        PTPRC        CEBPA       BCL2L1         THPO 
## 2.780516e-02 2.780516e-02 2.764052e-02 2.576963e-02 2.435270e-02 2.234825e-02 
##         CSF2        KMT2A         CDK2        CCNA2         CD34        GATA1 
## 2.190537e-02 2.174808e-02 2.125170e-02 2.023933e-02 2.017101e-02 1.937629e-02 
##        GATA2         TAL1       MAPK14          PML         CBFB        MECOM 
## 1.925808e-02 1.925808e-02 1.870927e-02 1.622726e-02 1.612021e-02 1.612021e-02 
##         PLK1          NF1       FCGR3B         DOK1       DNMT3A        CCND2 
## 1.421277e-02 1.325736e-02 1.034517e-02 9.824303e-03 9.265529e-03 8.493069e-03 
##       ICOSLG       CDKN1C        HLA-A        GAPDH      BCL2L11        CASP3 
## 7.913716e-03 6.526201e-03 5.478518e-03 4.738066e-03 4.545915e-03 3.646575e-03 
##        CASP9         MCL1     EIF4EBP1        IKZF1         RARA         CD33 
## 3.624327e-03 2.347132e-03 2.268452e-03 1.573734e-03 1.317970e-03 8.402301e-04 
##      KIR3DL1        BIRC3         CCR6      TNFSF10    TNFRSF10B        PRTN3 
## 4.449628e-04 2.961732e-04 2.570869e-18 0.000000e+00 0.000000e+00 0.000000e+00 
##        ELANE        CD177          MPO    TNFRSF10A         CCL3        HOXA9 
## 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 
##        MEIS1         BMI1        ASXL1         CCR7 
## 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00

❓Time for Question 7

Try and answer quiz question 7 on Slido!

6.5 Random Walk Restart/ Personalised PageRank

We extracted the interactome from a non-disease database, therefore the edges may only be applicable to normal settings. We are interested to disease-associated interactions. One of the ways to do so is to seed RWR from dysregulated protein, in our case BCR and ABL1. (as there is no node called BCR/ABL1)

Restarting from ABL1

## Create a vector of importance
ABL1_PPR_vector = ifelse(V(g)$name == "ABL1", 1, 0)
ABL1_PPR_vector
##   [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [38] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [75] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
## [112] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
ABL1_PPR <- page_rank(g,
                      personalized = ABL1_PPR_vector)
ABL1_PPR = ABL1_PPR$vector

ABL1_PPR[order(ABL1_PPR,
      decreasing = TRUE)]
##         ABL1         GRB2         SHC1          CRK         CRKL          SRC 
## 0.2003673067 0.0528574566 0.0458749224 0.0307561526 0.0302908107 0.0267461299 
##     HSP90AA1         EGFR          FN1       CTNNB1          CBL        NTRK1 
## 0.0267120163 0.0260221126 0.0232360835 0.0220142712 0.0215505564 0.0206331527 
##       PTPN11        NTRK3         JAK2        STAT3         PTK2          BCR 
## 0.0197411646 0.0188890406 0.0169055096 0.0159265991 0.0156832102 0.0154829282 
##         ABL2          LYN         JAK1        ERBB2         TP53        PTK2B 
## 0.0154829282 0.0131186963 0.0119736010 0.0117193851 0.0099586914 0.0098233079 
##       PDGFRA        ITGB3       CREBBP       STAT5A         EPOR       STAT5B 
## 0.0088603083 0.0088329827 0.0086244468 0.0086080212 0.0083537674 0.0079248138 
##          LCK        ITGB1        STAT1        RUNX1         GAB2        KITLG 
## 0.0072409179 0.0071066444 0.0067846673 0.0063780105 0.0063257452 0.0061556895 
##        MAPK8        MAPK9        FOXO3        HSPA4       PDGFRB        MAPK1 
## 0.0060337115 0.0060337115 0.0059397254 0.0057552829 0.0056277542 0.0053573364 
##          KIT        PTPN6        HSPA8         ACTB          ALK         HRAS 
## 0.0052579733 0.0050004369 0.0045511694 0.0045232936 0.0043436717 0.0042315113 
##       INPP5D       CDKN1A          IL3          HCK         LEF1        APAF1 
## 0.0041495052 0.0039655497 0.0038460310 0.0036389245 0.0034572266 0.0034080076 
##          IL6         NPM1         RAF1         KRAS        SIRPA         AKT1 
## 0.0033173542 0.0033090327 0.0032292599 0.0030808910 0.0030567126 0.0029923244 
##         CDK2        SOCS3        RASA1         GZMB         CD19         CD34 
## 0.0029730912 0.0029684313 0.0029526445 0.0029322188 0.0028252833 0.0028215244 
##        CCND1         BRAF        SOCS2         CALR          MYC          AXL 
## 0.0027878523 0.0026503053 0.0025914169 0.0025586309 0.0024540705 0.0024236630 
##        PTPN1        CSF3R         CSF3        SOCS1          IL2         MTOR 
## 0.0023855027 0.0023692351 0.0023670203 0.0022787583 0.0022512622 0.0022216452 
##       BCL2L1         NRAS         FLT3          EPO        CTLA4         BCL2 
## 0.0022151246 0.0021611337 0.0020999464 0.0020958472 0.0019856540 0.0019776525 
##        CEBPA        DNMT1        CREB1        CCNA2         CD8A        HLA-A 
## 0.0018104911 0.0017475004 0.0016811650 0.0016777093 0.0016089506 0.0015554091 
##        CEBPB        CCND2         CSF2       PIK3CG       CDKN1C         THPO 
## 0.0015490109 0.0015435996 0.0015346119 0.0015104282 0.0014404921 0.0012656990 
##       FCGR3B        GATA1         TCN1       DNMT3A        KMT2A          CD4 
## 0.0012532242 0.0012420229 0.0012211982 0.0011904451 0.0011427646 0.0011397338 
##         PTEN        CASP3        GATA2         TAL1          MPL     EIF4EBP1 
## 0.0011108941 0.0010770905 0.0009986590 0.0009986590 0.0009833835 0.0009441992 
##      BCL2L11        CASP9          NF1         IRF4          PML        PTPRC 
## 0.0008906253 0.0007879767 0.0007737509 0.0007125058 0.0007072810 0.0006838645 
##       MAPK14        GAPDH        IFNA1         MCL1       ICOSLG         CD33 
## 0.0006505337 0.0006447490 0.0005986800 0.0005661532 0.0005626020 0.0005326203 
##         PLK1         DOK1         CBFB        MECOM      KIR3DL1        BIRC3 
## 0.0005290555 0.0005019496 0.0004517757 0.0004517757 0.0004406992 0.0003051756 
##         RARA        IKZF1      TNFSF10    TNFRSF10B        PRTN3        ELANE 
## 0.0003005944 0.0002639299 0.0000000000 0.0000000000 0.0000000000 0.0000000000 
##        CD177          MPO    TNFRSF10A         CCL3        HOXA9        MEIS1 
## 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000 
##         BMI1        ASXL1         CCR6         CCR7 
## 0.0000000000 0.0000000000 0.0000000000 0.0000000000

Repeat the same analysis for BCR

What would the codes be for starting at “BCR” instead of “ABL1”?

❓Time for Question 8

Try and answer quiz question 8 on Slido!

Repeat the same analysis for BCR and ABL1

## Create a vector of importance
CML_PPR_vector = ifelse(V(g)$name %in% c("BCR","ABL1"), 1, 0)

CML_PPR <- page_rank(g,
                      personalized = CML_PPR_vector)
CML_PPR = CML_PPR$vector

CML_PPR[order(CML_PPR,
      decreasing = TRUE)]
##         ABL1          BCR         GRB2         SHC1          CRK         CRKL 
## 0.1853397587 0.0893217086 0.0488931473 0.0424343033 0.0284494412 0.0280189999 
##          SRC     HSP90AA1         EGFR          FN1       CTNNB1          CBL 
## 0.0247401701 0.0247086150 0.0240704541 0.0214933772 0.0203632008 0.0199342646 
##        NTRK1       PTPN11        NTRK3         JAK2        STAT3         PTK2 
## 0.0190856662 0.0182605773 0.0174723626 0.0156375964 0.0147321042 0.0145069695 
##         ABL2          LYN         JAK1        ERBB2         TP53        PTK2B 
## 0.0143217086 0.0121347941 0.0110755809 0.0108404313 0.0092117896 0.0090865598 
##       PDGFRA        ITGB3       CREBBP       STAT5A         EPOR       STAT5B 
## 0.0081957851 0.0081705090 0.0079776133 0.0079624196 0.0077272349 0.0073304528 
##          LCK        ITGB1        STAT1        RUNX1         GAB2        KITLG 
## 0.0066978490 0.0065736460 0.0062758172 0.0058996597 0.0058513143 0.0056940128 
##        MAPK8        MAPK9        FOXO3        HSPA4       PDGFRB        MAPK1 
## 0.0055811831 0.0055811831 0.0054942460 0.0053236367 0.0052056727 0.0049555361 
##          KIT        PTPN6        HSPA8         ACTB          ALK         HRAS 
## 0.0048636253 0.0046254041 0.0042098317 0.0041840466 0.0040178963 0.0039141479 
##       INPP5D       CDKN1A          IL3          HCK         LEF1        APAF1 
## 0.0038382923 0.0036681335 0.0035575787 0.0033660052 0.0031979346 0.0031524070 
##          IL6         NPM1         RAF1         KRAS        SIRPA         AKT1 
## 0.0030685526 0.0030608553 0.0029870654 0.0028498242 0.0028274592 0.0027679000 
##         CDK2        SOCS3        RASA1         GZMB         CD19         CD34 
## 0.0027501094 0.0027457990 0.0027311962 0.0027123024 0.0026133871 0.0026099101 
##        CCND1         BRAF        SOCS2         CALR          MYC          AXL 
## 0.0025787633 0.0024515324 0.0023970606 0.0023667336 0.0022700152 0.0022418883 
##        PTPN1        CSF3R         CSF3        SOCS1          IL2         MTOR 
## 0.0022065900 0.0021915425 0.0021894937 0.0021078514 0.0020824176 0.0020550218 
##       BCL2L1         NRAS         FLT3          EPO        CTLA4         BCL2 
## 0.0020489903 0.0019990486 0.0019424504 0.0019386587 0.0018367300 0.0018293285 
##        CEBPA        DNMT1        CREB1        CCNA2         CD8A        HLA-A 
## 0.0016747043 0.0016164379 0.0015550776 0.0015518811 0.0014882793 0.0014387534 
##        CEBPB        CCND2         CSF2       PIK3CG       CDKN1C         THPO 
## 0.0014328350 0.0014278296 0.0014195160 0.0013971461 0.0013324552 0.0011707716 
##       FCGR3B        GATA1         TCN1       DNMT3A        KMT2A          CD4 
## 0.0011592324 0.0011488712 0.0011296083 0.0011011617 0.0010570573 0.0010542538 
##         PTEN        CASP3        GATA2         TAL1          MPL     EIF4EBP1 
## 0.0010275770 0.0009963087 0.0009237596 0.0009237596 0.0009096298 0.0008733843 
##      BCL2L11        CASP9          NF1         IRF4          PML        PTPRC 
## 0.0008238284 0.0007288785 0.0007157196 0.0006590678 0.0006542349 0.0006325746 
##       MAPK14        GAPDH        IFNA1         MCL1       ICOSLG         CD33 
## 0.0006017437 0.0005963928 0.0005537790 0.0005236917 0.0005204068 0.0004926738 
##         PLK1         DOK1         CBFB        MECOM      KIR3DL1        BIRC3 
## 0.0004893763 0.0004643034 0.0004178926 0.0004178926 0.0004076468 0.0002822875 
##         RARA        IKZF1      TNFSF10    TNFRSF10B        PRTN3        ELANE 
## 0.0002780498 0.0002441351 0.0000000000 0.0000000000 0.0000000000 0.0000000000 
##        CD177          MPO    TNFRSF10A         CCL3        HOXA9        MEIS1 
## 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000 
##         BMI1        ASXL1         CCR6         CCR7 
## 0.0000000000 0.0000000000 0.0000000000 0.0000000000

❓Time for Question 9

Try and answer quiz question 9 on Slido!

7. Summarise all centrality scores and RWR results

ppr <- as.data.frame(CML_PPR)
centrality_result <- cbind(ppr, 
             degree_centrality, 
             eigenvector_centrality, 
             closeness_centrality,
             betweenness_centrality)

rmarkdown::paged_table(centrality_result) ## dont run this
# run this:
# centrality_result
centrality_average = rowMeans(centrality_result)
centrality_average[order(centrality_average,
      decreasing = TRUE)]
##         SRC    HSP90AA1        GRB2       STAT3       RUNX1        TP53 
## 341.9972840 297.2119916 221.9064844 213.9124043 211.7580173 198.3065843 
##        JAK2        EGFR      PTPN11        SHC1      CREBBP         LCK 
## 197.1653972 158.8530543 143.1535409 100.9856904 100.6095852  96.6832082 
##        ABL1        JAK1         LYN       APAF1        HRAS        PTK2 
##  90.1858716  89.8842974  87.5026900  83.8502125  61.6472394  55.5701431 
##       MAPK1         CBL        CD19       FOXO3      BCL2L1         CRK 
##  54.5336961  53.5486419  51.3430568  47.6403115  43.5270361  37.1411644 
##       HSPA8       RASA1         FN1      CDKN1A        CALR        CRKL 
##  35.1377476  34.7990678  33.7933703  32.3352432  31.6373485  31.6306545 
##        EPOR       ERBB2       HLA-A        BCL2      CTNNB1       GATA1 
##  30.7816259  29.7434115  28.1910560  27.9362553  26.9603651  26.8045398 
##       CTLA4       CASP3        MTOR         PML      FCGR3B       CCND1 
##  26.4203308  26.4013059  26.0064676  26.0038092  26.0026818  24.7487914 
##      STAT5A        CD8A       HSPA4        CSF3         IL3        NPM1 
##  23.9359179  21.9920297  21.2773682  19.2756901  17.1991904  16.5254498 
##       DNMT1        KRAS        RAF1       STAT1       NTRK1       CCNA2 
##  16.3114639  15.9098811  14.7354267  14.1611901  13.4035367  12.3410471 
##       ITGB3        CDK2         EPO         MYC       CEBPB       PTPN6 
##  12.3327058  11.4796843  11.2862506  11.1131402  10.5889089  10.2483478 
##         KIT        BRAF      PDGFRB         IL6      STAT5B         IL2 
##   9.9670742   9.8064910   9.7260036   8.5147268   8.0261429   7.9642641 
##         MPL        ACTB      PIK3CG        GAB2      PDGFRA       CEBPA 
##   7.9552168   7.3190998   7.1425860   6.8267716   6.5182903   6.3778095 
##       ITGB1       SOCS3        FLT3       PTK2B       SOCS1       CREB1 
##   5.6465468   4.5290869   4.4145633   4.1841853   4.0726259   3.8583809 
##        NRAS         HCK       CSF3R       KITLG       SOCS2        AKT1 
##   3.7938362   3.7086104   3.4048830   2.7182147   2.3600427   2.3356624 
##        LEF1       CCND2        CSF2      CDKN1C       PRTN3     BCL2L11 
##   2.1873891   2.1510962   1.9450353   1.9252999   1.6666667   1.6564514 
##         ALK        THPO      INPP5D       PTPN1       NTRK3       SIRPA 
##   1.4782418   1.4717368   1.3566787   1.2448116   1.2393587   1.2366757 
##       MAPK8       MAPK9       GATA2        TAL1        CCL3      DNMT3A 
##   1.2204768   1.2204768   1.2044702   1.2044702   1.1000000   0.9024823 
##     TNFSF10   TNFRSF10B   TNFRSF10A       ELANE         MPO         AXL 
##   0.9000000   0.9000000   0.9000000   0.8500000   0.8500000   0.8326775 
##        GZMB         CD4       KMT2A         NF1       CASP9        MCL1 
##   0.8195264   0.8066735   0.8050045   0.8032113   0.8012473   0.8009391 
##       HOXA9       MEIS1        BMI1       ASXL1        CCR6        CCR7 
##   0.6000000   0.6000000   0.6000000   0.6000000   0.4666667   0.4666667 
##       CD177         BCR        IRF4       IFNA1        TCN1        PTEN 
##   0.4400000   0.4238621   0.4119004   0.4110230   0.4109990   0.4094441 
##        ABL2       PTPRC        CD34      MAPK14        CBFB       MECOM 
##   0.4088621   0.4060894   0.4049635   0.4042989   0.4037387   0.4037387 
##        PLK1        DOK1      ICOSLG       GAPDH    EIF4EBP1       IKZF1 
##   0.4033869   0.4024461   0.4020531   0.4014493   0.4009913   0.4007049 
##        RARA        CD33     KIR3DL1       BIRC3 
##   0.4006593   0.4005738   0.4004816   0.4004206

❓Time for Question 10

Try and answer quiz question 10 on Slido!

8. GeneCard

https://www.genecards.org/cgi-bin/carddisp.pl?gene=HSP90AA1&keywords=HSP90AA1

❓Time for Question 11

Try and answer quiz question 11 on Slido!

9. DrugBank

https://go.drugbank.com/bio_entities/BE0001120

❓Time for Question 12

Try and answer quiz question 12 on Slido!

SessionInfo

sessionInfo()
## R version 4.2.2 (2022-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19045)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United Kingdom.utf8 
## [2] LC_CTYPE=English_United Kingdom.utf8   
## [3] LC_MONETARY=English_United Kingdom.utf8
## [4] LC_NUMERIC=C                           
## [5] LC_TIME=English_United Kingdom.utf8    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] data.table_1.14.8 dplyr_1.1.3       igraph_1.5.1     
## 
## loaded via a namespace (and not attached):
##  [1] rstudioapi_0.15.0 knitr_1.44        magrittr_2.0.3    tidyselect_1.2.0 
##  [5] R6_2.5.1          rlang_1.1.1.9000  fastmap_1.1.1     fansi_1.0.5      
##  [9] tools_4.2.2       xfun_0.40         R.oo_1.25.0       utf8_1.2.3       
## [13] cli_3.6.1         jquerylib_0.1.4   htmltools_0.5.6.1 yaml_2.3.7       
## [17] digest_0.6.31     tibble_3.2.1      lifecycle_1.0.3   bookdown_0.36    
## [21] R.utils_2.12.2    sass_0.4.7        vctrs_0.6.4       curl_5.1.0       
## [25] glue_1.6.2        cachem_1.0.8      evaluate_0.22     rmarkdown_2.25   
## [29] compiler_4.2.2    bslib_0.5.1       pillar_1.9.0      R.methodsS3_1.8.2
## [33] generics_0.1.3    rmdformats_1.0.4  jsonlite_1.8.7    pkgconfig_2.0.3